
	* produce table of summary statistics
	
	***Inputs: 
	* $Data/mainsample.dta
	
	
	***Outputs: 
	* $Results/summstats.tex
	
	
	use $Data/mainsample.dta, clear

	
	by anon_id, sort: egen maxplay = max(nth_play)
	
	
	by anon_id, sort: egen meanpm = mean(weighted_dailypm)
	
	
	
	by anon_id, sort: gen firstob = _n==1 
	keep if firstob ==1
	
			
	
	gen educthree = 1 if education_level == 1 | education_level==2 | education_level == 3 | education_level== 8
	
	replace educthree = 2 if education_level == 4 
	
	replace educthree = 3 if education_level == 5 | education_level == 6 | education_level == 7
	
	label define educthree 1 "< College" 2 "Bachelor" 3 "Post-graduate"
	label values educthree educthree
	
	quietly tab educthree, gen(educlevels)

	label variable age "Age"
	label variable weighted_dailypm "Daily PM2.5"
	
	encode user_type, gen(free)
	gen paid = 1- (free -1 )

	
	
	gen above50 = age_sig >= 50 & age_si<.
	
	gen above65 = age_sig >= 65 & age_si<.
	
	label variable above50 "Age $\geq$ 50"
	
	label variable above65 "Age $\geq$ 65"
	
	label variable paid "Paid Subscriber"
	
	label variable male "Male"
	
	label variable maxplay "Maximum Play per Game"
	
	estpost tabstat age above50 above65 male meanpm maxplay educlevels* paid,  statistics(mean sd) columns(statistics) 
	
	 esttab using $Results/summstats.tex, main(mean) aux(sd) nostar unstack ///
            nonote label replace


			gen college = educthree~=1 
			replace college =. if educthree ==.
	
	foreach var in male above65 college {
		
		summ `var'
		local mean_`var' =r(mean)
		local sd_`var' =r(sd)
		
	}
		
	
	ttest male == 1-50.8 // US population
	
	
	ttest above65 == 16.5 // US population

	
	ttest college == 32.1 // US population

	
	
	
	
	
